# -*- coding: UTF-8 -*-
import re
from business.spider import SpiderHelp
from dao.mysql import Mysql
import time

url = "http://www.chinabidding.com/search/proj.htm"
pattern = re.compile(r'(\w)*bidDetail(\w)*')
spider = SpiderHelp()
######################
# 改此处
bidType = "0105"
# pubDate 2 表最近3天
payload = {"poClass": "BidNotice", 'infoClassCodes': bidType, 'currentPage': 1, 'pubDate': 1}
soup_page = spider.beautifulsoup_bypost(url, payload)
# 获得最大页码数
page_element = soup_page.find("form", id="pagerSubmitForm").find_all("a")
page_max = page_element[len(page_element) - 2].text

for i in range(1, int(page_max) + 1):
    payload = {'infoClassCodes': bidType, 'currentPage': i, 'pubDate': 2}
    soup_links = spider.beautifulsoup_bypost(url, payload)
    print("第", i, "页,共", page_max, "页")
    for link in spider.target_links(soup_links, pattern):
        mysql = Mysql()
        spider_body = SpiderHelp()
        soup_body = spider_body.beautifulsoup_bypost(link, payload)
        sql = "INSERT INTO `raw_source` (`origin_url`, `origin_html`,`origin_info`, `status`, `site_id`, `gmt_create`)" \
              "VALUES (%s, %s,'Tpye-招标公告', 0, '2', sysdate())"
        val = (link, soup_body)
        row = mysql.insertOne(sql, val)
        mysql.dispose(1)
        print(link, "执行完成,行号：", row)
    time.sleep(5)
# targets = {1015: "招标公告", 1016: "招标变更公告", 1017: "评标结果公示", 1018: "中标结果公告"}
# for target in targets:
#     print(target)
#     print(targets[target])
